[general]
enable_icache_modeling = true

[perf_model/core]
frequency = 2.666
logical_cpus = 1 # number of SMT threads per core
type = interval
core_model = nehalem

[perf_model/core/interval_timer]
dispatch_width = 4
window_size = 96
num_outstanding_loadstores = 8

[perf_model/sync]
reschedule_cost = 1000

[caching_protocol]
type = parametric_dram_directory_msi

[perf_model/branch_predictor]
type = pentium_m
mispredict_penalty=15 # From microarchitecture.pdf

[perf_model/cache]
levels = 3

[perf_model/l1_icache]
cache_size = 32
associativity = 8
address_hash = mask
replacement_policy = lru
data_access_time = 3
tags_access_time = 1
perf_model_type = parallel
writethrough = 0
shared_cores = 1

[perf_model/l1_dcache]
cache_size = 32
associativity = 8
address_hash = mask
replacement_policy = lru
data_access_time = 3
tags_access_time = 1
writeback_time = 150 # L2 hit time will be added
perf_model_type = parallel
writethrough = 0
shared_cores = 1

[perf_model/l2_cache]
cache_size = 3072
associativity = 12
address_hash = mod
replacement_policy = lru
data_access_time = 14 # According to http://www.realworldtech.com/page.cfm?ArticleID=RWT040208182719&p=7
tags_access_time = 3
writeback_time = 60 # L3 hit time will be added
perf_model_type = parallel
writethrough = 0
shared_cores = 2

[perf_model/l3_cache]
perfect = false
cache_block_size = 64
cache_size = 16384
associativity = 16
address_hash = mask
replacement_policy = lru
data_access_time = 96 # 38ns according to lmbench, =100 cycles @ 2.66GHz, -L1 & L2 tag access
tags_access_time = 10
writeback_time = 390
perf_model_type = parallel
writethrough = 0
shared_cores = 6
prefetcher = none
dvfs_domain = global

[perf_model/dram_directory]
# Intel 7300 Northbridge Specs: http://www.intel.com/Products/Server/Chipsets/7300/7300-overview.htm
# 7300 tracks 1,048,576 caches lines, in a 16-way configuration.
# total_entries = number of entries per directory controller.
total_entries = 1048576 # Intel 7300 Northbridge Specs: http://www.intel.com/Products/Server/Chipsets/7300/7300-overview.htm
associativity = 16
directory_type = full_map

[perf_model/dram]
# Currently set to -1, which means that we have a number of distributed DRAM controllers (4 in this case)
# By setting the num_controllers to 1, we will simulate the number of DRAM controllers more accurately
# Nevertheless, this could reduce the DRAM bandwidth below what the level is for the 7300 Northbridge.
num_controllers = -1
controllers_interleaving = 6
# DRAM access latency in nanoseconds. Should not include L1-LLC tag access time, directory access time (24 cycles = 9 ns),
# or network time [(cache line size + 2*{overhead=40}) / network bandwidth = 18 ns]
latency = 173
per_controller_bandwidth = 2.5              # In GB/s, as measured by core_validation-dram

[network]
memory_model_1 = bus
memory_model_2 = bus

[network/bus]
# Dunnington Specs: http://www.intel.com/assets/PDF/datasheet/320335.pdf
#  1066 MTS
#  64-bit bus
#  Max per-cpu BW = 8.528 GB/s (not GiB/s)
# Intel 7300 Chipset Overview: http://www.intel.com/products/server/chipsets/7300/7300-overview.htm
#  PtP (CPU to chipset) Directory-based cache coherency
# Intel 7300 Chipset Details: http://www.intel.com/products/server/chipsets/7300/7300-technicaldocuments.htm
bandwidth = 8 # in GB/s
ignore_local_traffic = false # Memory controllers are off-chip, so traffic from core0 to dram0 does go over the bus

[clock_skew_minimization]
scheme = barrier

[clock_skew_minimization/barrier]
quantum = 100

[dvfs]
transition_latency = 10000 # In ns, /sys/devices/system/cpu/cpu0/cpufreq/cpuinfo_transition_latency

[dvfs/simple]
cores_per_socket = 6

[power]
vdd = 1.6 # Volts
technology_node = 45 # nm
